Multi-omics analysis for samples with matched brain and retina tissue from same subjects¶

  • GLDS-202 v6 and GLDS-203 v8
  • Select subjects that have methylation/RNA-Seq from matched tissue
  • Define promoter regions as +/- 2kb from TSS
  • Define gene methylation level as weighted methylation within promoter of each gene for each sample
  • Option 1: For each group, calculate correlation between RNA-Seq and RRBS counts per gene for each tissue separarely and then intersect between tissues
  • Option 2: For each group, calculate correlation between RNA-Seq or RRBS counts per gene for each assay separately and then intersect between assays
In [261]:
#Load libraries

#General
library(deconvSeq)
library(data.table)
library(plyr)
library(dplyr)
library(tidyverse)
library(ggplot2)
library(DESeq2)
library(matrixStats)
library(ComplexUpset)
library("RColorBrewer")
library(treemapify)
library(ggtree)
library(cowplot)
library(pheatmap)
library(MDSeq)
library(matrixStats)

#Annotation
organism="org.Mm.eg.db"
library(organism, character.only = TRUE)
library("genomation")
library("GenomicRanges")
library("biomaRt")

#GSEA and ORA
library(clusterProfiler)
library(enrichplot)
library(ggnewscale)
library(rrvgo)
In [3]:
#Get gene mappings and biotypes
httr::set_config(httr::config(ssl_verifypeer = FALSE))
mart <- useMart('ENSEMBL_MART_ENSEMBL')
mart <- useDataset('mmusculus_gene_ensembl', mart)
annotLookup <- getBM(
  mart = mart,
  attributes = c(
    'ensembl_gene_id',
    'ensembl_transcript_id',
    'external_gene_name',
    'gene_biotype',
    'entrezgene_id'),
  uniqueRows = TRUE)
In [ ]:
#Read in transcript and CpG island annotations
gene.obj=readTranscriptFeatures("/home/jupyter/MOUNT/references/mm10.GRCm38.96.bed", up.flank = 2000,down.flank = 2000, remove.unusual=TRUE, unique.prom = TRUE)
cpg.obj=readFeatureFlank("/home/jupyter/MOUNT/references/cpgi.mm10.bed",feature.flank.name=c("CpGi","shores"), remove.unusual = TRUE)
bedfile<-fread("/home/jupyter/MOUNT/references/mm10.GRCm38.96.bed",header=F)
bedfile<-subset(bedfile,select=c("V1","V4","V6","V2","V3"))
colnames(bedfile)<-c("chr","feature.name","transStrand","transStart","transStop")

write.table(subset(as.data.frame(gene.obj$introns),score==1),"/home/jupyter/MOUNT/references/mm10.firstintrons.tsv",sep="\t",quote=FALSE,row.names=FALSE)
write.table(subset(as.data.frame(gene.obj$exons),score==1),"/home/jupyter/MOUNT/references/mm10.firstexons.tsv",sep="\t",quote=FALSE,row.names=FALSE)

Define functions¶

In [263]:
## Function for annotating methylation matrix
annotateMeth <- function(data){

    gr <- makeGRangesFromDataFrame(data, keep.extra.columns=FALSE,
                                      start.field="start",
                                      end.field="end")

    #CpG island/shore/other annotation
    cpgAnn=annotateWithFeatureFlank(gr,
                                    cpg.obj$CpGi,cpg.obj$shores,feature.name="CpGi",flank.name="shores")
    cpgMat<-as.data.frame(getMembers(cpgAnn))
    cpgMat$CpG<-ifelse(cpgMat$CpGi==1,"Island",ifelse(cpgMat$shores==1,"Shore","Other"))
    data<-cbind(data,cpgMat)
        
    #Gene annotation
    geneAnn=annotateWithGeneParts(gr,gene.obj)
    geneMat<-cbind(getAssociationWithTSS(geneAnn), as.data.frame(getMembers(geneAnn)))
    data<-cbind(data,geneMat)
    data<-merge(data,bedfile,by=c("chr","feature.name"))
    
    ## Uncomment for adding repeats and enhancer annotations; can also be modified to add first exons/introns
    #    #Repeats annotation
    #    repeatsMat<-as.data.frame(getMembers(annotateWithFeature(gr, readGeneric("/home/jupyter/MOUNT/references/mm10.repeats.tsv", 
    #                                                 header=TRUE,meta.cols=list(RepFamily=5)),feature.name="RepFamily")))
    #    colnames(repeatsMat)<-c("Repeats")
    
    #    #Enhancer annotation
    #    enhMat<-as.data.frame(getMembers(annotateWithFeature(gr, readGeneric("/home/jupyter/MOUNT/references/mm10.enhancers.tsv", 
    #                                                header=TRUE,meta.cols=list(Enhancer=4)),feature.name="Enhancer")))
    #    colnames(enhMat)<-c("Enhancer")
    #    data<-cbind(data,repeatsMat,enhMat)
    
    #Workaround for a bug in genomation (https://github.com/BIMSBbioinfo/genomation/issues/203)
    data$intron<-ifelse((data$start<=data$transStart & data$transStrand=="+"),0,
                    ifelse((data$start>=data$transStop & data$transStrand=="-"),0,data$intron))
    data$Region<-ifelse(data$prom==1,"Promoter",
                        ifelse(data$exon==1, "Exon",  
                               ifelse(data$intron==1,"Intron","Intergenic")))
    #Filter on genomic context and significance
    data<-subset(data,Region!="Intergenic")
    data<-merge(data,annotLookup,by.x=c("feature.name"),by.y=c("ensembl_transcript_id"))
    data
}

## Function for upset plot
createUpsetPlot <- function(data) {
    df<-reshape2::dcast(data, gene~Type, length)
    Groups = colnames(df)[2:3]
    plotUpset<-upset(df, Groups, name='Groups', width_ratio=0.1, min_size=0, min_degree=1, set_sizes=FALSE,
      queries=list(
        upset_query(set='Control', fill='gray'),
        upset_query(set='IR', fill='magenta')
      ),
      themes=upset_default_themes(text=element_text(size=24,face="bold"), 
                                  axis.title=element_text(face="bold",size=26),
                                  panel.border = element_rect(colour = "black", fill=NA, size=1)),
      sort_intersections_by='cardinality',
      base_annotations=list('Size'=(intersection_size(counts=TRUE))),
       matrix=intersection_matrix(geom=geom_point(shape='square filled',size=5)))
     plotUpset
}
In [266]:
#Sample manifest
manifest<-fread("/home/jupyter/MOUNT/integrated/manifest.txt",header=T,sep="\t")
manifest<- manifest %>% dplyr::filter(Brain_rna!="Mmus_C57_6J_BRN_HLU_IRC_4mon_Rep5_M93") %>% 
        dplyr::filter(Tissue=="Both" & Group %in% c("IR","Control"))
manifest[1:2,]
A data.table: 2 × 13
SubjectRepTissueSampleTimepointGroupBrain_methRetina_methBrain_rnaRetina_rnaCountBrain_ageRetina_age
<chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><int><dbl><dbl>
M83Rep1BothHLLC_IRC_4mon_Rep1_M834mControlCFG1778CFG1837Mmus_C57_6J_BRN_HLLC_IRC_4mon_Rep1_M83Mmus_C57_6J_RTN_HLLC_IRC_4mon_Rep1_M83413.33.4
M85Rep2BothHLLC_IRC_4mon_Rep2_M854mControlCFG1780CFG1839Mmus_C57_6J_BRN_HLLC_IRC_4mon_Rep2_M85Mmus_C57_6J_RTN_HLLC_IRC_4mon_Rep2_M85414.1 NA

Process methylation datasets for brain and retina¶

In [ ]:
#Process and merge brain methylation data
samples_202=manifest$Brain_meth
files_202=paste("GLDS-202_Epigenomics_R1_",samples_202,
                "_trimmed.fq_trimmed_bismark_bt2.bismark.cov.gz",sep="")
setwd("/home/jupyter/MOUNT/integrated/bismark/glds202")
methmat_202_all = getmethmat(filnames=files_202, sample.id=samples_202, filtype="bismarkCoverage")
methmat_202_all<-annotateMeth(methmat_202_all)
In [ ]:
#Process and merge retina methylation data
samples_203=manifest$Retina_meth
files_203=paste("GLDS-203_Epigenomics_R1_",samples_203,
                "_trimmed.fq_trimmed_bismark_bt2.bismark.cov.gz",sep="")
setwd("/home/jupyter/MOUNT/integrated/bismark/glds203")
methmat_203_all = getmethmat(filnames=files_203, sample.id=samples_203, filtype="bismarkCoverage")
methmat_203_all<-annotateMeth(methmat_203_all)
In [268]:
colnames(methmat_202_all)
  1. 'feature.name'
  2. 'chr'
  3. 'start'
  4. 'end'
  5. 'strand'
  6. 'coverage1'
  7. 'numCs1'
  8. 'numTs1'
  9. 'coverage2'
  10. 'numCs2'
  11. 'numTs2'
  12. 'coverage3'
  13. 'numCs3'
  14. 'numTs3'
  15. 'coverage4'
  16. 'numCs4'
  17. 'numTs4'
  18. 'coverage5'
  19. 'numCs5'
  20. 'numTs5'
  21. 'coverage6'
  22. 'numCs6'
  23. 'numTs6'
  24. 'coverage7'
  25. 'numCs7'
  26. 'numTs7'
  27. 'coverage8'
  28. 'numCs8'
  29. 'numTs8'
  30. 'coverage9'
  31. 'numCs9'
  32. 'numTs9'
  33. 'CpGi'
  34. 'shores'
  35. 'CpG'
  36. 'target.row'
  37. 'dist.to.feature'
  38. 'feature.strand'
  39. 'prom'
  40. 'exon'
  41. 'intron'
  42. 'transStrand'
  43. 'transStart'
  44. 'transStop'
  45. 'Region'
  46. 'ensembl_gene_id'
  47. 'external_gene_name'
  48. 'gene_biotype'
  49. 'entrezgene_id'
In [269]:
#Only keep sites with >=10 reads in all samples
selectedCols <- methmat_202_all[grep("coverage", names(methmat_202_all), fixed = T)]
methmat_202_all <- methmat_202_all[apply(selectedCols, 1, 
                                   function(x) all(!is.na(x) & x>=10)), ]
selectedCols <- methmat_203_all[grep("coverage", names(methmat_203_all), fixed = T)]
methmat_203_all <- methmat_203_all[apply(selectedCols, 1, 
                                   function(x) all(!is.na(x) & x>=10)), ]
colMeans(methmat_202_all[grep("coverage", names(methmat_202_all), fixed = T)])
colMeans(methmat_203_all[grep("coverage", names(methmat_203_all), fixed = T)])
dim(methmat_202_all)
coverage1
35.6163023662561
coverage2
40.6964631087997
coverage3
34.9839303726599
coverage4
56.5696631994892
coverage5
33.7433694857534
coverage6
118.749006348266
coverage7
39.4002137681495
coverage8
43.6322054210122
coverage9
23.7762502197833
coverage1
34.4304846866137
coverage2
72.3449925397784
coverage3
19.3983587512386
coverage4
34.8676736067609
coverage5
33.9680264012119
coverage6
63.8628999191335
coverage7
39.2379155799041
coverage8
33.5441021537831
coverage9
41.3992756181733
  1. 864488
  2. 49
In [272]:
#Calculate methylation % for all sites/samples
methmat_202<-methmat_202_all
methmat_203<-methmat_203_all

for (i in 1:length(samples_202)){
    numCs <- rlang::sym(paste("numCs", i, sep = ""))
    coverage <- rlang::sym(paste("coverage", i, sep = ""))
    methmat_202<-methmat_202 %>% 
        dplyr::mutate(!!paste0("ratio_",manifest$Brain_meth[i]) := !!numCs/!!coverage)
    methmat_203<-methmat_203 %>% 
        dplyr::mutate(!!paste0("ratio_",manifest$Retina_meth[i]) := !!numCs/!!coverage)
}

#Calculate ratios (per site) and filter sites with sd<0.02
methmat_202 <- methmat_202 %>% mutate(mean = select(., starts_with("ratio_")) %>% rowMeans(na.rm = TRUE),
                                     sd = rowSds(as.matrix(select(.,starts_with("ratio_"))),na.rm = TRUE)) %>%
                               filter(sd > 0.02)
methmat_203 <- methmat_203 %>% mutate(mean = select(., starts_with("ratio_")) %>% rowMeans(na.rm = TRUE),
                                     sd = rowSds(as.matrix(select(.,starts_with("ratio_"))),na.rm = TRUE)) %>%
                               filter(sd > 0.02)
In [273]:
methmat_202[1,]
A data.frame: 1 × 60
feature.namechrstartendstrandcoverage1numCs1numTs1coverage2numCs2⋯ratio_CFG1780ratio_CFG1771ratio_CFG1775ratio_CFG1777ratio_CFG1779ratio_CFG1783ratio_CFG1788ratio_CFG1789meansd
<chr><chr><int><int><chr><int><int><int><int><int>⋯<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
1ENSMUST00000000001chr3108146074108146074*28028110⋯0000.043478260.02352941000.055555560.013618140.02196966
In [274]:
## Uncomment if only degree of hypermethylation needs to be evaluated
#for (i in 1:length(samples_202)){
#    numCs <- rlang::sym(paste("numCs", i, sep = ""))
#    coverage <- rlang::sym(paste("coverage", i, sep = ""))
#    methmat_202<-methmat_202 %>% dplyr::mutate(!!coverage := ifelse(!!sym(numCs)==0,NA,!!sym(coverage)), !!numCs := ifelse(!!sym(numCs)==0,NA,!!sym(numCs)))
#    methmat_203<-methmat_203 %>% dplyr::mutate(!!coverage := ifelse(!!sym(numCs)==0,NA,!!sym(coverage)), !!numCs := ifelse(!!sym(numCs)==0,NA,!!sym(numCs)))
#}

Calculate gene-level methylation using weighted (by coverage) methylation ratio¶

In [415]:
#Calculate weighted methylation for each gene
methmat_202_counts<- methmat_202 %>% dplyr::filter(Region=="Intron") %>% 
                    dplyr::select(c("external_gene_name","Region",contains(c("numCs","coverage")))) %>%
                    dplyr::group_by(external_gene_name,Region) %>%  
                    summarise(across(starts_with(c('numCs', 'coverage')), sum, na.rm=TRUE),.groups = 'drop')
methmat_203_counts<- methmat_203 %>% dplyr::filter(Region=="Intron") %>% 
                    dplyr::select(c("external_gene_name","Region",contains(c("numCs","coverage")))) %>%
                    dplyr::group_by(external_gene_name,Region) %>%  
                    summarise(across(starts_with(c('numCs', 'coverage')), sum, na.rm=TRUE),.groups = 'drop')
for (i in 1:length(samples_202)){
    coverage <- rlang::sym(paste("coverage", i, sep = ""))
    numCs <- rlang::sym(paste("numCs", i, sep = ""))
    methmat_202_counts<-methmat_202_counts %>% mutate(!!paste("ratio",manifest$Brain_meth[i],sep="_") := !!numCs/!!coverage)
    methmat_203_counts<-methmat_203_counts %>% mutate(!!paste("ratio",manifest$Retina_meth[i],sep="_") := !!numCs/!!coverage)
}
In [340]:
#Methylation ratio distribution
library(ggpubr)
df_202 <- as.data.frame(gather
                        (as.data.frame(methmat_202_counts %>% select(c("CpG","Region",contains("ratio_")))),
                        key='key',value='value',-c('CpG','Region')))
df_202$key<-gsub("ratio_","",df_202$key)
df_203 <- as.data.frame(gather
                        (as.data.frame(methmat_203_counts %>% select(c("CpG","Region",contains("ratio_")))),
                        key='key',value='value',-c('CpG','Region')))
df_203$key<-gsub("ratio_","",df_203$key)

df_202<-merge(df_202,manifest,by.x=c("key"),by.y=c("Brain_meth"))
df_202$CpG<-ifelse(df_202$CpG=="Island","CpGi",ifelse(df_202$CpG=="Shore","CpGs","Other"))
df_203<-merge(df_203,manifest,by.x=c("key"),by.y=c("Retina_meth")) 
df_203$CpG<-ifelse(df_203$CpG=="Island","CpGi",ifelse(df_203$CpG=="Shore","CpGs","Other"))

options(repr.plot.width=10, repr.plot.height=20)
plot_grid(ggboxplot(subset(df_202), x = "Group", y = "value",
          color = "Group", outlier.shape = NA) + stat_compare_means(method="t.test") + facet_wrap(CpG~Region),
          ggboxplot(subset(df_203), x = "Group", y = "value",
          color = "Group", outlier.shape = NA) + stat_compare_means(method="t.test") + facet_wrap(CpG~Region),nrow=2)

p3<-ggplot(subset(df_202)) + geom_density(aes(color=paste(Group,Subject),x=value,..scaled..),size=1) + 
xlab("Methylation level") + ylab("Probability density") + 
theme(axis.text = element_text(size = 16), axis.title = element_text(size = 20)) + facet_wrap(CpG~Region)     
p4<-ggplot(subset(df_203)) + geom_density(aes(color=paste(Group,Subject),x=value,..scaled..),size=1) + 
xlab("Methylation level") + ylab("Probability density") + 
theme(axis.text = element_text(size = 16), axis.title = element_text(size = 20)) + facet_wrap(CpG~Region)   

options(repr.plot.width=15, repr.plot.height=15)
plot_grid(p3 + 
          scale_color_manual(values=c("blue","blue","blue","blue","red","red","red","red","red","red"))+ 
          ggtitle("BRAIN") + theme(plot.title = element_text(size=20)),
          p4 + 
          scale_color_manual(values=c("blue","blue","blue","blue","red","red","red","red","red","red"))+
          ggtitle("RETINA") + theme(plot.title = element_text(size=20)),
          nrow=2)
In [355]:
#Compute PCs
matPC<-subset(methmat_203_counts,Region=="Intron")[,grep("ratio", names(methmat_203_counts))]
matPC[is.na(matPC)] <- 0
dfWide.pca <- prcomp(t(matPC), center = TRUE)
dfWide.pca <- as.data.frame(dfWide.pca$x)
dfWide.pca$ID<-gsub("ratio_","",row.names(dfWide.pca))
dfWide.pca<-merge(dfWide.pca,manifest,by.x=c("ID"),by.y=c("Retina_meth"))

#PCA plots
options(repr.plot.width=5, repr.plot.height=5)
retinaPC<-ggplot(
  dfWide.pca,
  aes(x = PC1, y = PC2,color=Group,label=Subject)
  ) + geom_text() +
  scale_shape_manual(values=c(17,16)) +
  geom_hline(yintercept = 0, colour = "gray65") +
  geom_vline(xintercept = 0, colour = "gray65") + theme_classic()

#Compute PCs
matPC<-subset(methmat_202_counts,Region=="Intron")[,grep("ratio", names(methmat_202_counts))]
matPC[is.na(matPC)] <- 0
dfWide.pca <- prcomp(t(matPC), center = TRUE)
dfWide.pca <- as.data.frame(dfWide.pca$x)
dfWide.pca$ID<-gsub("ratio_","",row.names(dfWide.pca))
dfWide.pca<-merge(dfWide.pca,manifest,by.x=c("ID"),by.y=c("Brain_meth"))

#PCA plots
options(repr.plot.width=5, repr.plot.height=5)
brainPC<-ggplot(
  dfWide.pca,
  aes(x = PC1, y = PC2,color=Group,label=Subject)
  ) + geom_text() +
  scale_shape_manual(values=c(17,16)) +
  geom_hline(yintercept = 0, colour = "gray65") +
  geom_vline(xintercept = 0, colour = "gray65") + theme_classic()

options(repr.plot.width=15, repr.plot.height=5)
plot_grid(brainPC,retinaPC,nrow=1)

Process RNA-seq data¶

In [296]:
genesCoding<-unique(as.data.frame(subset(annotLookup, 
                                         gene_biotype=="protein_coding" | grepl("RNA",gene_biotype), 
                                         select=c("ensembl_gene_id"))))
colnames(genesCoding) <-c("gene")

#BRN dataset (GLDS-202)
counts202<-read.delim("/home/jupyter/MOUNT/integrated/GLDS-202_rna_seq_Unnormalized_Counts.csv",sep=",",row.names=1,header=TRUE)
counts202<-counts202 %>% dplyr::filter(!grepl("ERCC",row.names(counts202))) %>% 
            dplyr::select(manifest$Brain_rna)
genesCounts<-as.data.frame(row.names(counts202))
colnames(genesCounts) <- c("gene")
merged<-merge(genesCounts,genesCoding,by=c("gene"))
counts202 <- counts202[merged$gene,]

coldata202<-as.data.frame(colnames(counts202))
colnames(coldata202)<-c("SampleName")
coldata202$Group<-manifest$Group
dds202 <- DESeqDataSetFromMatrix(countData = round(counts202),colData = coldata202,design =~ 1 )
ddsNofilt202 <- estimateSizeFactors(dds202)
keep <- rowSums(counts(ddsNofilt202, normalized=TRUE) >= 5 ) >= 2 & 
    rowSums(counts(ddsNofilt202, normalized=TRUE) == 0 ) <= 4
dds202 <- ddsNofilt202[keep,]
counts202<-log2(as.data.frame(counts(dds202, normalized=TRUE))+1)
counts202$ensembl_gene_id<-rownames(counts202)
counts202<-merge(counts202,unique(subset(annotLookup,select=c("ensembl_gene_id","external_gene_name"))),by=c("ensembl_gene_id"))

#RTN dataset (GLDS-203)
counts203<-read.delim("/home/jupyter/MOUNT/integrated/GLDS-203_rna_seq_Unnormalized_Counts.csv",sep=",",row.names=1,header=TRUE)
counts203<-counts203 %>% filter(!grepl("ERCC",row.names(counts203))) %>%
            dplyr::select(manifest$Retina_rna) 
genesCounts<-as.data.frame(row.names(counts203))
colnames(genesCounts) <-c("gene")
merged<-merge(genesCounts,genesCoding,by=c("gene"))
counts203 <- counts203[merged$gene,]
coldata203<-as.data.frame(colnames(counts203))
colnames(coldata203)<-c("SampleName")
coldata203$Group<-manifest$Group
dds203 <- DESeqDataSetFromMatrix(countData = round(counts203),colData = coldata203,design =~ 1 )

ddsNofilt203 <- estimateSizeFactors(dds203)
keep <- rowSums(counts(ddsNofilt203, normalized=TRUE) >= 5 ) >= 2 & 
    rowSums(counts(ddsNofilt203, normalized=TRUE) == 0 ) <= 4
dds203 <- ddsNofilt203[keep,]
counts203<-log2(as.data.frame(counts(dds203, normalized=TRUE))+1)
counts203$ensembl_gene_id<-rownames(counts203)
counts203<-merge(counts203,unique(subset(annotLookup,select=c("ensembl_gene_id","external_gene_name"))),by=c("ensembl_gene_id"))
dim(counts202)
dim(counts203)
converting counts to integer mode

converting counts to integer mode

  1. 16817
  2. 11
  1. 16890
  2. 11

Within assay: Correlation between brain and retina in each group¶

Within each exposure group, calculate Pearson correlation for each gene promoter (CpGi/s) using brain-retina matched pairs

RRBS¶

In [416]:
#Merge brain and retina data on common genes
mergedMeth<-merge(methmat_202_counts %>% dplyr::select(c("external_gene_name",contains("ratio"))),
                  methmat_203_counts %>% dplyr::select(c("external_gene_name",contains("ratio"))),
                  by=c("external_gene_name"))
mergedMeth[is.na(mergedMeth)] <- 0
In [417]:
#Calculate correlation between brain and retina for each gene within each group
for (group in c("IR","Control")){
    tmp202<-as.matrix(mergedMeth[,paste0("ratio_",subset(manifest,Group==group)$Brain_meth)])
    tmp203<-as.matrix(mergedMeth[,paste0("ratio_",subset(manifest,Group==group)$Retina_meth)])
    corMatTmp<-suppressWarnings(sapply(seq.int(dim(tmp202)[1]), 
           function(i) cor.test(tmp202[i,], tmp203[i,])))
    corMatTmp<-as.data.frame(t(corMatTmp)) %>% dplyr::select(statistic,p.value,estimate,conf.int) %>% 
                                    rename_with( ~ paste0(.x,".",group))
    assign(paste0("corMat_",group),corMatTmp)
}
corMatMeth<-cbind(corMat_IR,corMat_Control)
corMatMeth$gene<-mergedMeth$external_gene_name
In [418]:
#Correlated genes (positive and negative)
posCorMeth<-unique(rbind(as.data.frame(corMatMeth %>% dplyr::filter(p.value.IR<0.05 & estimate.IR!=0) 
                            %>% dplyr::select(gene) %>% mutate(Type="IR")),
              as.data.frame(corMatMeth %>% dplyr::filter(p.value.Control<0.05 & estimate.Control>0) 
                            %>% dplyr::select(gene) %>% mutate(Type="Control"))))
posCorMeth$Class<-"RRBS"

RNA-Seq¶

In [301]:
mergedRna<-merge(counts202,counts203,by="ensembl_gene_id")
for (group in c("IR","Control")){
    tmp202<-as.matrix(t(apply(mergedRna[,subset(manifest,Group==group)$Brain_rna],1,scale)))
    tmp203<-as.matrix(t(apply(mergedRna[,subset(manifest,Group==group)$Retina_rna],1,scale)))
    corMatTmp<-suppressWarnings(sapply(seq.int(dim(tmp202)[1]), 
           function(i) cor.test(tmp202[i,], tmp203[i,])))
    corMatTmp<-as.data.frame(t(corMatTmp)) %>% dplyr::select(statistic,p.value,estimate,conf.int) %>% 
                                    rename_with( ~ paste0(.x,".",group))
    assign(paste0("corMatRna_",group),corMatTmp)
}

corMatRna<-cbind(corMatRna_IR,corMatRna_Control)
corMatRna$ensembl_gene_id<-mergedRna$ensembl_gene_id
corMatRna<-merge(corMatRna,unique(subset(annotLookup,select=c("ensembl_gene_id","external_gene_name"))),by=c("ensembl_gene_id"))
corMatRna <- corMatRna %>% rename(gene=external_gene_name)
In [302]:
#Correlated genes (positive and negative)
posCorRna<-unique(rbind(as.data.frame(corMatRna %>% dplyr::filter(p.value.IR<0.05 & estimate.IR!=0) 
                            %>% dplyr::select(gene) %>% mutate(Type="IR")),
              as.data.frame(corMatRna %>% dplyr::filter(p.value.Control<0.05 & estimate.Control>0) 
                            %>% dplyr::select(gene) %>% mutate(Type="Control"))))
posCorRna$Class<-"RNA-Seq"

Within tissue: Correlation between RNA-seq and RRBS in each group¶

Within each exposure group for each tissue type, calculate Pearson correlation for each gene promoter (CpGi/s) using RRBS-RNA-seq matched pairs

In [419]:
methmat_203_counts[is.na(methmat_203_counts)] <- 0
methmat_202_counts[is.na(methmat_202_counts)] <- 0
merged203<-merge(counts203,methmat_203_counts,by="external_gene_name")
for (group in c("IR","Control")){
    tmpMeth<-as.matrix(merged203[,paste0("ratio_",subset(manifest,Group==group)$Retina_meth)])
    tmpRna<-as.matrix(t(apply(merged203[,subset(manifest,Group==group)$Retina_rna],1,scale)))
    corMat<-suppressWarnings(sapply(seq.int(dim(tmpMeth)[1]), 
           function(i) cor.test(tmpMeth[i,], tmpRna[i,])))
    corMat<-as.data.frame(t(corMat)) %>% dplyr::select(statistic,p.value,estimate,conf.int) %>% 
                                    rename_with( ~ paste0(.x,".",group))
    assign(paste0("corMatRetina_",group),corMat)
}
                                    
merged202<-merge(counts202,methmat_202_counts,by="external_gene_name")
for (group in c("IR","Control")){
    samples_rna<-subset(manifest,Group==group)$Brain_rna
    samples_meth<-subset(manifest,Group==group)$Brain_meth
    tmpMeth<-as.matrix(merged202[,paste0("ratio_",samples_meth)])
    tmpRna<-as.matrix(t(apply(merged202[,samples_rna],1,scale)))
    corMat<-suppressWarnings(sapply(seq.int(dim(tmpMeth)[1]), 
           function(i) cor.test(tmpMeth[i,], tmpRna[i,])))
    corMat<-as.data.frame(t(corMat)) %>% dplyr::select(statistic,p.value,estimate,conf.int) %>% 
                                    rename_with( ~ paste0(.x,".",group))
    assign(paste0("corMatBrain_",group),corMat)
}
In [420]:
corMatBrain<-cbind(corMatBrain_IR,corMatBrain_Control)
corMatRetina<-cbind(corMatRetina_IR,corMatRetina_Control)
corMatBrain$gene<-merged202$external_gene_name
corMatRetina$gene<-merged203$external_gene_name
dim(corMatBrain)
dim(corMatRetina)
  1. 4957
  2. 9
  1. 4372
  2. 9
In [421]:
#Correlated between RRBS and RNA-Seq
posCorRetina<-unique(rbind(as.data.frame(corMatRetina %>% dplyr::filter(p.value.IR<0.05 & estimate.IR!=0) 
                            %>% dplyr::select(gene) %>% mutate(Type="IR")),
              as.data.frame(corMatRetina %>% dplyr::filter(p.value.Control<0.05 & estimate.Control>0) 
                            %>% dplyr::select(gene) %>% mutate(Type="Control"))))
posCorRetina$Class<-"RTN"
In [422]:
#Correlated between RRBS and RNA-Seq
posCorBrain<-unique(rbind(as.data.frame(corMatBrain %>% dplyr::filter(p.value.IR<0.05 & estimate.IR!=0) 
                            %>% dplyr::select(gene) %>% mutate(Type="IR")),
              as.data.frame(corMatBrain %>% dplyr::filter(p.value.Control<0.05 & estimate.Control>0) 
                            %>% dplyr::select(gene) %>% mutate(Type="Control"))))
posCorBrain$Class<-"BRN"
In [423]:
allCor<-rbind(posCorMeth,posCorRna,posCorBrain,posCorRetina)
allCor$Type<-paste(allCor$Class,allCor$Type,sep=" ")

    df<-reshape2::dcast(allCor, gene~Type, length)
    df[1,]
    #Groups = colnames(df)[2:9]
    Groups=c("RTN Control","BRN Control","RNA-Seq Control","RRBS Control","RTN IR","BRN IR","RNA-Seq IR","RRBS IR")
    plotUpset2<-upset(df, Groups, name='Groups', width_ratio=0.1, min_size=0, min_degree=1, set_sizes=FALSE, sort_sets=FALSE,
      queries=list(
        upset_query(set='RNA-Seq Control', fill='lightblue'),
        upset_query(set='RNA-Seq IR', fill='orange'),
        upset_query(set='RRBS Control', fill='blue'),
        upset_query(set='RRBS IR', fill='brown'),
        upset_query(set='RTN Control', fill='darkgrey'),
        upset_query(set='RTN IR', fill='red'),
        upset_query(set='BRN Control', fill='cyan'),
        upset_query(set='BRN IR', fill='magenta')
      ),
      themes=upset_default_themes(text=element_text(size=20,face="bold"), 
                                  axis.title=element_text(face="bold",size=20),
                                  panel.border = element_rect(colour = "black", fill=NA, size=1)),
      #sort_intersections_by='cardinality',
      base_annotations=list('Size'=(intersection_size(counts=TRUE))),
       matrix=intersection_matrix(geom=geom_point(shape='square filled',size=5)))
options(repr.plot.width=20, repr.plot.height=10)
     plotUpset2 + ggtitle("Genes correlated between expression and methylation within each tissue OR between brain and retina within each assay") + theme(plot.title = element_text(size=20))
dfSum = df %>% mutate(sum = select(., !starts_with("gene") & !contains("Control")) %>% rowSums(na.rm = TRUE))
Using Class as value column: use value.var to override.

A data.frame: 1 × 9
geneBRN ControlBRN IRRNA-Seq ControlRNA-Seq IRRRBS ControlRRBS IRRTN ControlRTN IR
<chr><int><int><int><int><int><int><int><int>
11110002E22Rik00000010
In [426]:
genes<-subset(dfSum, `RTN IR`==1 &
              `RTN Control`==0 & `BRN Control`==0 & `RRBS Control`==0 & `RNA-Seq Control`==0,
                select=c("gene"))
genes$gene
  1. '1700022N22Rik'
  2. '4930439D14Rik'
  3. '6430548M08Rik'
  4. '8430429K09Rik'
  5. 'Acr'
  6. 'Acvr1'
  7. 'Adamts2'
  8. 'Agfg2'
  9. 'Agpat5'
  10. 'Aldh1l2'
  11. 'Ankrd17'
  12. 'Apcdd1'
  13. 'Ar'
  14. 'Arfgap3'
  15. 'Arhgap26'
  16. 'Arhgap33'
  17. 'Arid3a'
  18. 'Asb3'
  19. 'Atp2a3'
  20. 'Atp8a2'
  21. 'Atp8b1'
  22. 'B3galnt1'
  23. 'Bcl2l11'
  24. 'Bdh2'
  25. 'Brd1'
  26. 'Brd7'
  27. 'C230057M02Rik'
  28. 'Cacna1i'
  29. 'Cbx2'
  30. 'Ccdc190'
  31. 'Ccdc88b'
  32. 'Ccnk'
  33. 'Ccser1'
  34. 'Cd47'
  35. 'Cdk14'
  36. 'Cds2'
  37. 'Cfap91'
  38. 'Chn1'
  39. 'Chrac1'
  40. 'Chrna2'
  41. 'Cldn10'
  42. 'Cntn6'
  43. 'Col5a1'
  44. 'Cr1l'
  45. 'Ctnna2'
  46. 'Cux2'
  47. 'Cyfip1'
  48. 'D430019H16Rik'
  49. 'Dcbld1'
  50. 'Ddx59'
  51. 'Dennd1b'
  52. 'Dhcr24'
  53. 'Dhtkd1'
  54. 'Dipk2a'
  55. 'Dmd'
  56. 'Dpy19l1'
  57. 'Edn3'
  58. 'Efnb3'
  59. 'Ehd1'
  60. 'Elfn1'
  61. 'Endou'
  62. 'Espn'
  63. 'Eya3'
  64. 'Fam131c'
  65. 'Fam174b'
  66. 'Fam219a'
  67. 'Fam78b'
  68. 'Fars2'
  69. 'Fbln5'
  70. 'Fbrsl1'
  71. 'Fbxl19'
  72. 'Fgf5'
  73. 'Fkbp9'
  74. 'Fndc3b'
  75. 'Fntb'
  76. 'Frs2'
  77. 'Galnt14'
  78. 'Galntl6'
  79. 'Gata3'
  80. 'Gcn1'
  81. 'Gfm2'
  82. 'Gm1110'
  83. 'Gm39244'
  84. 'Gm44503'
  85. 'Gm6288'
  86. 'Gm9869'
  87. 'Grhl1'
  88. 'Grin2a'
  89. 'Grm8'
  90. 'Gucy1a2'
  91. 'Hrk'
  92. 'Hs3st3b1'
  93. 'Ibtk'
  94. 'Icmt'
  95. 'Ift27'
  96. 'Igfbp5'
  97. 'Ip6k3'
  98. 'Ipmk'
  99. 'Itga2'
  100. 'Jade1'
  101. 'Kctd17'
  102. 'Kirrel3os'
  103. 'Klhl36'
  104. 'Kndc1'
  105. 'Larp7'
  106. 'Lekr1'
  107. 'Lgals9'
  108. 'Lgmn'
  109. 'Lhx2'
  110. 'Lingo1'
  111. 'Lingo2'
  112. 'Lrrfip1'
  113. 'Macf1'
  114. 'Mad1l1'
  115. 'Map3k10'
  116. 'Map3k14'
  117. 'Mboat1'
  118. 'Mecom'
  119. 'Meis2'
  120. 'Micall2'
  121. 'Mlana'
  122. 'Mmab'
  123. 'Mmp28'
  124. 'Mocos'
  125. 'Mospd3'
  126. 'Mro'
  127. 'Mtmr3'
  128. 'Myh14'
  129. 'Nedd4l'
  130. 'Nfkbib'
  131. 'Nifk'
  132. 'Nmur1'
  133. 'Ntf3'
  134. 'Ntrk2'
  135. 'Otud3'
  136. 'Pan3'
  137. 'Pcif1'
  138. 'Pcsk1n'
  139. 'Pdia5'
  140. 'Pdzd7'
  141. 'Phaf1'
  142. 'Phf24'
  143. 'Pi4ka'
  144. 'Pkhd1'
  145. 'Pla2g4e'
  146. 'Plcxd3'
  147. 'Plekha7'
  148. 'Podxl'
  149. 'Ptpn11'
  150. 'Rab42'
  151. 'Rad9b'
  152. 'Rap1gap2'
  153. 'Rcl1'
  154. 'Ripor2'
  155. 'Rlf'
  156. 'Rnf144a'
  157. 'Rnf157'
  158. 'Rnf165'
  159. 'Rpl9'
  160. 'Rtkn2'
  161. 'Ryr1'
  162. 'Ryr3'
  163. 'Scamp5'
  164. 'Sccpdh'
  165. 'Scg5'
  166. 'Scn1a'
  167. 'Scnn1a'
  168. 'Scube3'
  169. 'Scyl2'
  170. 'Sdhc'
  171. 'Secisbp2'
  172. 'Sesn1'
  173. 'Sh3gl2'
  174. 'Slc16a7'
  175. 'Slc19a3'
  176. 'Slc20a2'
  177. 'Slc23a2'
  178. 'Slc25a48'
  179. 'Slc38a1'
  180. 'Slit3'
  181. 'Smap1'
  182. 'Smoc2'
  183. 'Snx9'
  184. 'Socs5'
  185. 'Sox6'
  186. 'Spock1'
  187. 'Sqle'
  188. 'Sugct'
  189. 'Synj2'
  190. 'Tasor2'
  191. 'Tcf25'
  192. 'Ticam1'
  193. 'Tm9sf3'
  194. 'Tmem119'
  195. 'Tmem204'
  196. 'Tmem59l'
  197. 'Tnfaip8'
  198. 'Tpcn2'
  199. 'Trim29'
  200. 'Trim8'
  201. 'Triobp'
  202. 'Trmt5'
  203. 'Trpc7'
  204. 'Ttc27'
  205. 'Ttc8'
  206. 'Tti1'
  207. 'Ttyh3'
  208. 'Txnrd1'
  209. 'Upf2'
  210. 'Veph1'
  211. 'Wdfy1'
  212. 'Wwc1'
  213. 'Wwox'
  214. 'Xkr7'
  215. 'Xrcc4'
  216. 'Zcchc24'
  217. 'Zfp951'
  218. 'Zfp993'
  219. 'Zgrf1'
  220. 'Zmynd12'
  221. 'Zscan20'
  222. 'Zswim6'
In [427]:
options(repr.plot.width=12, repr.plot.height=10)
retinaEgo <- enrichGO(gene  =  as.character(unique(as.vector(genes$gene))),
                OrgDb         = org.Mm.eg.db,
                keyType       = 'SYMBOL',
                minGSSize     = 5,maxGSSize= 500,
                #universe      = as.character(unique(as.vector(bkgGenes$ensembl_gene_id))),
                ont           = "ALL",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.25,
                qvalueCutoff  = 0.25)
In [428]:
plot_grid(dotplot(brainEgo, split="ONTOLOGY") + facet_grid(ONTOLOGY~., scale="free"),
         dotplot(retinaEgo, split="ONTOLOGY") + facet_grid(ONTOLOGY~., scale="free"))
         
dim(retinaEgo)
dim(brainEgo)
all<-rbind(subset(as.data.frame(retinaEgo) %>% mutate(tissue="RTN"),`p.adjust`<0.25),
           subset(as.data.frame(brainEgo) %>% mutate(tissue="BRN"),`p.adjust`<0.25))
ddply(all,.(tissue,ONTOLOGY),nrow)
tmpAll<-ddply(all,.(Description),nrow)
all<-merge(tmpAll,all,by=c("Description"))
  1. 1385
  2. 10
  1. 211
  2. 10
A data.frame: 6 × 3
tissueONTOLOGYV1
<chr><chr><int>
BRNBP 61
BRNCC123
BRNMF 27
RTNBP958
RTNCC169
RTNMF258
In [454]:
dmls202<-fread("/home/jupyter/MOUNT/integrated/GLDS-202_dml.tsv",header=T) %>% dplyr::filter(Type=="4m_IR")
dmls203<-fread("/home/jupyter/MOUNT/integrated/GLDS-203_dml.tsv",header=T) %>% dplyr::filter(Type=="4m_IR")
dmls202[1,]
dmls203[1,]
ddply(dmls202,.(Region),nrow)
ddply(dmls203,.(Region),nrow)
sort(unique(subset(dmls202,qvalue<0.05 & abs(meth.diff)>5 & Region=="Gene_body" & !grepl("Rik|Gm",external_gene_name))$external_gene_name))
A data.table: 1 × 19
feature.nametarget.rowdist.to.featurefeature.strandchrstartendstrandpvalueqvaluemeth.diffpromexonintronRegionTypeensembl_gene_idexternal_gene_namegene_biotype
<chr><int><int><chr><chr><int><int><chr><dbl><dbl><dbl><int><int><int><chr><chr><chr><chr><chr>
ENSMUST00000000094553969+chr2156721137156721137*0.0001002670.03768793-1.290323101Promoter4m_IRENSMUSG00000061689Dlgap4protein_coding
A data.table: 1 × 18
chrfeature.nametarget.rowdist.to.featurefeature.strandstartendstrandpvalueqvaluemeth.diffTypeensembl_gene_idexternal_gene_namegene_biotypetransStrandtransStarttransStop
<chr><chr><int><int><chr><int><int><chr><dbl><dbl><dbl><chr><chr><chr><chr><chr><int><int>
chr1ENSMUST000000270831749765+6536102065361020*7.813858e-060.02808006-4.5951864m_IRENSMUSG00000025946Pth2rprotein_coding+6531125665389244
A data.frame: 3 × 2
RegionV1
<chr><int>
Gene_body 1389
Intergenic 948
Promoter 7373
Error in FUN(X[[i]], ...): object 'Region' not found
Traceback:

1. ddply(dmls203, .(Region), nrow)
2. splitter_d(.data, .variables, drop = .drop)
3. eval.quoted(.variables, data)
4. lapply(exprs, eval, envir = envir, enclos = enclos)
5. FUN(X[[i]], ...)
6. FUN(X[[i]], ...)
In [327]:
tmp<-subset(all,V1==2 & ONTOLOGY=="BP" & tissue=="RTN")

simMatrix <- calculateSimMatrix(tmp$ID,
                                orgdb="org.Mm.eg.db",
                                ont="BP",
                                method="Rel")
    scores <- setNames(-log10(tmp$`p.adjust`), tmp$ID)
    reducedTerms_Combined <- reduceSimMatrix(simMatrix,
                                scores,
                                orgdb="org.Mm.eg.db")
preparing gene to GO mapping data...

preparing IC data...

Warning message in calculateSimMatrix(tmp$ID, orgdb = "org.Mm.eg.db", ont = "BP", :
“Removed 1 terms that were not found in orgdb for BP”
In [433]:
subset(all,V1==2 & ONTOLOGY=="BP" & tissue=="BRN")$Description
subset(all, grepl("radiation|methylation",Description))
subset(all, grepl("Mgmt",geneID))
  1. 'action potential propagation'
  2. 'axonogenesis'
  3. 'central nervous system neuron differentiation'
  4. 'neuron migration'
  5. 'neuronal action potential propagation'
  6. 'positive regulation of smooth muscle cell proliferation'
A data.frame: 10 × 12
DescriptionV1ONTOLOGYIDGeneRatioBgRatiopvaluep.adjustqvaluegeneIDCounttissue
<chr><int><chr><chr><chr><chr><dbl><dbl><dbl><chr><int><chr>
492histone H3-K4 methylation 1BPGO:00515682/21461/29008 0.0746870990.24624260.2032980Gata3/Rlf 2RTN
493histone H3-K4 monomethylation 1BPGO:00976921/2149/29008 0.0644781930.23227780.1917686Rlf 1RTN
607macromolecule methylation 1BPGO:00434146/214314/290080.0295359630.21152020.1746311Gata3/Icmt/Larp7/Pcif1/Rlf/Trmt5 6RTN
642methylation 1BPGO:00322597/214363/290080.0186312670.17215670.1421326Gata3/Icmt/Larp7/Mecom/Pcif1/Rlf/Trmt5 7RTN
978positive regulation of DNA methylation-dependent heterochromatin assembly1BPGO:00903092/19911/29008 0.0024726420.18809860.1706657Dnmt1/Pphln1 2BRN
1214regulation of DNA demethylation 1BPGO:19015351/2148/29008 0.0575233650.22601750.1866001Gata3 1RTN
1215regulation of DNA methylation-dependent heterochromatin assembly 1BPGO:00903082/19914/29008 0.0040360790.19413540.1761430Dnmt1/Pphln1 2BRN
1367response to ionizing radiation 1BPGO:00102124/214133/290080.0171468840.16599520.1370457Eya3/Gata3/Rad9b/Xrcc4 4RTN
1373response to radiation 1BPGO:00093148/214430/290080.0149439730.15694970.1295777Atp8a2/Cds2/Eya3/Gata3/Grin2a/Meis2/Rad9b/Xrcc48RTN
1384RNA methylation 1BPGO:00015103/21470/29008 0.0150859600.15694970.1295777Larp7/Pcif1/Trmt5 3RTN
A data.frame: 0 × 12
DescriptionV1ONTOLOGYIDGeneRatioBgRatiopvaluep.adjustqvaluegeneIDCounttissue
<chr><int><chr><chr><chr><chr><dbl><dbl><dbl><chr><int><chr>
In [328]:
options(repr.plot.width=10, repr.plot.height=10)
ggplot(reducedTerms_Combined, aes(area = score, subgroup = parentTerm, fill = parentTerm, label = term)) + 
  geom_treemap(size=0.5, col='white') +
  geom_treemap_text(col='gray',place="center",reflow=TRUE) +
  geom_treemap_subgroup_border(col='white',size=1) +
  geom_treemap_subgroup_text(col='white',place="center",fontface="bold",size=12,reflow=TRUE,grow=TRUE,min.size=1) + theme(plot.title = element_text(hjust=0.5,size=10)) +
  guides(fill=F)
Warning message:
“The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
of ggplot2 3.3.4.”
In [437]:
egoDf<-subset(as.data.frame(all),ONTOLOGY=="BP" & Count>=5 & `p.adjust`<0.25)
egoDf<-subset(egoDf,select=c("Description","geneID")) %>% mutate(geneID = strsplit(as.character(geneID), "/")) %>% unnest(geneID)
tmp<-ddply(egoDf,.(geneID),nrow)
egoDf<-merge(egoDf,tmp,by=c("geneID"))
egoDf$geneID <- reorder(egoDf$geneID, -egoDf$V1)
egoDf$Description <- reorder(egoDf$Description, egoDf$V1)
options(repr.plot.width=35, repr.plot.height=12)
ggplot(egoDf[order(egoDf$V1),], aes(x=geneID, y=Description)) + 
    geom_tile(fill="darkgreen") + 
    theme(axis.text.x = element_text(angle = -90, hjust = 0),text=element_text(size=30)) + 
    scale_fill_discrete(guide=F) + ylab("Biological process") + xlab("Gene")
In [204]:
setwd("/home/jupyter/glds202_203/Pathways")
cases<-c("IR")
for (j in 1:length(cases)){
    title<-cases[j]
    tmpData<-subset(as.data.frame(corMatMeth),select=c(paste0("statistic.",title),paste0("p.value.",title),"gene"))
    colnames(tmpData)<-c("estimate","pvalue","gene")
    tmpData<-subset(tmpData,!is.na(estimate) & estimate!=Inf & estimate!= -Inf)
    #tmpData$stat<-sign(as.numeric(tmpData$estimate))* -log10(as.numeric(tmpData$pvalue))
    tmpData$stat<-as.numeric(tmpData$estimate)
    tmpData<-subset(tmpData,select=c("stat","gene"))
    tmpData<-subset(tmpData,!is.na(stat))

    rankMetric <- as.numeric(tmpData$stat)
    names(rankMetric) <- tmpData$gene
    rankMetric <- sort(rankMetric, decreasing = TRUE)
    
    ####GO####
    gseaGO <- gseGO(geneList=rankMetric,
                                     ont ="BP",
                                     keyType="SYMBOL",
                                     pvalueCutoff = 0.25,
                                     by = "fgsea",
                                     pAdjustMethod = "BH",
                                     OrgDb = org.Mm.eg.db)
    if(dim(gseaGO)[1]>0){
            gseaRes<-data.frame(gseaGO)
            gseaRes$Dataset<-"RRBS"
            gseaRes$Group<-title
            write.table(gseaRes,paste0("RRBS_",title,"_clusterProfiler.GO.BP.all.symbol.tsv"),sep="\t",quote=FALSE,row.names=F)
            }
    }
preparing geneSet collections...

GSEA analysis...

Warning message in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, :
“There are ties in the preranked stats (0.14% of the list).
The order of those tied genes will be arbitrary, which may produce unexpected results.”
no term enriched under specific pvalueCutoff...

In [209]:
min(tmpData$stat)
-15.4188874652002
In [195]:
corMatMeth[1:3,]
A data.frame: 3 × 9
statistic.IRp.value.IRestimate.IRconf.int.IRstatistic.Controlp.value.Controlestimate.Controlconf.int.Controlgene
<list><list><list><list><list><list><list><list><chr>
1-0.12195990.9106417-0.07023966-0.8969236, 0.8656726-1.9750460.186942-0.813058-0.9959165, 0.6772293
2-0.50735960.6468486-0.281112-0.9321825, 0.79942410.45980380.69080180.3091982-0.9275152, 0.97927710610009B22Rik
3-0.62992630.5734523-0.341786-0.9404601, 0.7738243-1.105460.3841465-0.6158535-0.9906097, 0.84593030610009E02Rik
In [ ]:
simMatrix <- calculateSimMatrix(as.data.frame(gseRRBS)$ID,
                                orgdb="org.Mm.eg.db",
                                ont="BP",
                                method="Rel")
    scores <- setNames(-log10(as.data.frame(gseRRBS)$`p.adjust`), as.data.frame(gseRRBS)$ID)
    reducedTerms_Combined <- reduceSimMatrix(simMatrix,
                                scores,
                                orgdb="org.Mm.eg.db")
preparing gene to GO mapping data...

preparing IC data...

In [ ]:
options(repr.plot.width=10, repr.plot.height=10)
ggplot(reducedTerms_Combined, aes(area = score, subgroup = parentTerm, fill = parentTerm, label = term)) + 
  geom_treemap(size=0.5, col='white') +
  geom_treemap_text(col='gray',place="center",reflow=TRUE) +
  geom_treemap_subgroup_border(col='white',size=1) +
  geom_treemap_subgroup_text(col='white',place="center",fontface="bold",size=12,reflow=TRUE,grow=TRUE,min.size=1) + theme(plot.title = element_text(hjust=0.5,size=10)) +
  guides(fill=F)
In [205]:
corTissue<-rbind(posCorRetina %>% mutate(Sign="pos"), negCorRetina %>% mutate(Sign="neg"),
                 posCorBrain %>% mutate(Sign="pos"), negCorBrain %>% mutate(Sign="neg"))

corAssay<-rbind(posCorMeth %>% mutate(Sign="pos"), negCorMeth %>% mutate(Sign="neg"),
                 posCorRna %>% mutate(Sign="pos"), negCorRna %>% mutate(Sign="neg"))
corTissue[1,]
subset(ddply(unique(subset(corTissue,Type=="IR",select=c("gene","tissue"))), .(gene), nrow),V1>1)$gene
subset(ddply(unique(subset(corTissue,Type=="Control",select=c("gene","tissue"))), .(gene), nrow),V1>1)$gene
A data.frame: 1 × 4
geneTypetissueSign
<chr><chr><chr><chr>
11700029J07RikIRRTNpos
  1. '5033430I15Rik'
  2. 'Arrdc3'
  3. 'Atad2b'
  4. 'Avpi1'
  5. 'Eln'
  6. 'Fbxo21'
  7. 'Fgf11'
  8. 'Fktn'
  9. 'G3bp2'
  10. 'Gabrg3'
  11. 'Gm28778'
  12. 'Il20ra'
  13. 'Lrrc28'
  14. 'Mfsd9'
  15. 'Mgmt'
  16. 'Mid1'
  17. 'Mrc2'
  18. 'Napa'
  19. 'Ncs1'
  20. 'Nsmce1'
  21. 'Ppm1e'
  22. 'Rmc1'
  23. 'Rpl8'
  24. 'Sgsh'
  25. 'Stim1'
  26. 'Sufu'
  27. 'Tmem39b'
  28. 'Tufm'
  29. 'Uba3'
  30. 'Ulk3'
  31. 'Wrap53'
  32. 'Yipf5'
  33. 'Zfp703'
  34. 'Zfp787'
  1. '1700030C10Rik'
  2. 'Abracl'
  3. 'Adam23'
  4. 'Adar'
  5. 'Agbl2'
  6. 'Anapc15'
  7. 'Btc'
  8. 'Ctnnal1'
  9. 'Exosc8'
  10. 'Fbxo6'
  11. 'Gatd1'
  12. 'Gm4673'
  13. 'Gnat1'
  14. 'Hdgf'
  15. 'Itga8'
  16. 'Kcnip1'
  17. 'Kcnk1'
  18. 'Lrp1b'
  19. 'Mak16'
  20. 'Mta3'
  21. 'Pex11a'
  22. 'Phactr1'
  23. 'Phc2'
  24. 'Pik3cd'
  25. 'Ppcs'
  26. 'Sipa1l3'
  27. 'Socs2'
  28. 'St6galnac2'
  29. 'Tlk1'
  30. 'Tmem18'
  31. 'Trappc6b'
  32. 'Zbtb46'
  33. 'Zfp768'
In [363]:
diffVarRetina<-fread("/home/jupyter/glds202_203/DiffVar/RTN_IR_vs_Control.tsv",header=T,sep="\t")
sort(subset(diffVarRetina,
       external_gene_name %in% c(subset(posCorRetina,Type=="IR")$gene) & 
       (FDR.dispersion<0.5 | FDR.mean<0.5))$external_gene_name)
  1. 'E230016M11Rik'
  2. 'Galc'
  3. 'Hsp90b1'
  4. 'Zfp606'
In [ ]:
tmp1<-subset(counts202,grepl("Cul1",external_gene_name))
tmp2<-subset(counts203,grepl("Cul1",external_gene_name))
In [ ]:
options(repr.plot.width=10, repr.plot.height=8)
tmp1Long<-melt(subset(tmp1,select=-c(ensembl_gene_id))) %>%
            mutate(group=ifelse(grepl("HLLC_IRC",variable),"Control",ifelse(grepl("HLLC_IR_",variable),"IR",
                                                                            ifelse(grepl("HLU_IRC",variable),"HLU","Combined"))),
                  timepoint=ifelse(grepl("4mon",variable),"4m",ifelse(grepl("1mon",variable),"1m","7d")))
ggplot(tmp1Long) + geom_boxplot(aes(x=group,y=value,color=group)) + xlab("gene") + ylab("Normalized counts") + 
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + facet_wrap(.~external_gene_name,scale="free") + ggtitle("Brain")

tmp2Long<-melt(subset(tmp2,select=-c(ensembl_gene_id))) %>%
            mutate(group=ifelse(grepl("HLLC_IRC",variable),"Control",ifelse(grepl("HLLC_IR_",variable),"IR",
                                                                            ifelse(grepl("HLU_IRC",variable),"HLU","Combined"))),
                  timepoint=ifelse(grepl("4mon",variable),"4m",ifelse(grepl("1mon",variable),"1m","7d")))
ggplot(tmp2Long) + geom_boxplot(aes(x=group,y=value,color=group)) + xlab("gene") + ylab("Normalized counts") + 
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + facet_wrap(.~external_gene_name,scale="free") + ggtitle("Retina")
Warning message in melt(subset(tmp1, select = -c(ensembl_gene_id))):
“The melt generic in data.table has been passed a data.frame and will attempt to redirect to the relevant reshape2 method; please note that reshape2 is deprecated, and this redirection is now deprecated as well. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can prepend the namespace like reshape2::melt(subset(tmp1, select = -c(ensembl_gene_id))). In the next version, this warning will become an error.”
Using external_gene_name as id variables

Warning message in melt(subset(tmp2, select = -c(ensembl_gene_id))):
“The melt generic in data.table has been passed a data.frame and will attempt to redirect to the relevant reshape2 method; please note that reshape2 is deprecated, and this redirection is now deprecated as well. To continue using melt methods from reshape2 while both libraries are attached, e.g. melt.list, you can prepend the namespace like reshape2::melt(subset(tmp2, select = -c(ensembl_gene_id))). In the next version, this warning will become an error.”
Using external_gene_name as id variables

In [ ]:
diffVarRetina<-fread("/home/jupyter/glds202_203/DiffVar/RTN_IR_vs_Control.tsv",header=T,sep="\t")
diffVarRetina<-subset(diffVarRetina,select=c("external_gene_name"), FDR.dispersion<0.1)
corGeneListPos<-subset(corMatRetina, `estimate.IR`>0)$gene
corGeneListNeg<-subset(corMatRetina, `estimate.IR`<0)$gene
manifestIR<-subset(manifest,Group=="IR")

for (i in 1:dim(subset(manifestIR))[1]){
    tmpMeth<-as.matrix(merged203[,paste0("ratio_",manifestIR$Retina_meth[i])])
    tmpRna<-as.matrix(t(apply(merged203[,manifestIR$Retina_rna[i]],1,scale)))
    tmp<-subset(merged203,select=c(subset(manifestIR)$Retina_rna[i],
                                   paste0("ratio_",subset(manifest)$Retina_meth[i])),
                     external_gene_name =="Babam1")
    colnames(tmp)<-c("Brain","Retina")
    if(i==1){
        longRna<-tmp
    }
    else{
        longRna<-rbind(longRna,tmp)
    }
}
ggplot(longRna,aes(x=Brain,y=Retina)) + geom_point(size=5) + geom_smooth(method="lm")
In [233]:
manifest[1,]
A data.table: 1 × 13
SubjectRepTissueSampleTimepointGroupBrain_methRetina_methBrain_rnaRetina_rnaCountBrain_ageRetina_age
<chr><chr><chr><chr><chr><chr><chr><chr><chr><chr><int><dbl><dbl>
M83Rep1BothHLLC_IRC_4mon_Rep1_M834mControlCFG1778CFG1837Mmus_C57_6J_BRN_HLLC_IRC_4mon_Rep1_M83Mmus_C57_6J_RTN_HLLC_IRC_4mon_Rep1_M83413.33.4
In [ ]:
#Network of genes with correlation and differential expression/variability
library("STRINGdb")
string_db <- STRINGdb$new(version="11.5", species=10090,score_threshold=400,input_directory="")

diffVarBrain<-fread("/home/jupyter/glds202_203/DiffVar/BRN_IR_vs_Control.tsv",header=T,sep="\t")
diffVarBrain<-subset(diffVarBrain,select=c("external_gene_name"), FDR.dispersion<1 | FDR.mean<1)
diffVarBrain<-unique(subset(diffVarBrain,
       external_gene_name %in% c(subset(posCorBrain,Type=="Control")$gene,
                                subset(negCorBrain,Type=="Control")$gene,
                                subset(posCorBrain,Type=="IR")$gene,
                                subset(negCorBrain,Type=="IR")$gene),
    select=c("external_gene_name")))
length(diffVarBrain$external_gene_name)
cat(paste0(sort(diffVarBrain$external_gene_name),sep=","))
cat("\n")

diffVarRetina<-fread("/home/jupyter/glds202_203/DiffVar/RTN_IR_vs_Control.tsv",header=T,sep="\t")
diffVarRetina<-subset(diffVarRetina,select=c("external_gene_name"), FDR.dispersion<1 | FDR.mean<1)
diffVarRetina<-unique(subset(diffVarRetina,
       external_gene_name %in% c(subset(posCorRetina,Type=="Control")$gene,
                                subset(negCorRetina,Type=="Control")$gene,
                                subset(posCorRetina,Type=="IR")$gene,
                                subset(negCorRetina,Type=="IR")$gene),
    select=c("external_gene_name")))
length(diffVarRetina$external_gene_name)
cat(paste0(sort(diffVarRetina$external_gene_name),sep=","))

df<-merge(diffVarBrain,diffVarRetina,by=c("external_gene_name"))
colnames(df)<-c("gene")

df<-diffVarBrain
colnames(df)<-c("gene")
diffVarBrain$group<-"BRN_IR"
diffVarRetina$group<-"RTN_IR"
df<-rbind(diffVarBrain,diffVarRetina)
In [222]:
tmp<-subset(df, group=="BRN_IR")
tmp$color<-ifelse(tmp$external_gene_name %in% subset(negCorBrain,Type=="IR")$gene, "blue",
                 ifelse(tmp$external_gene_name %in% subset(posCorBrain,Type=="IR")$gene, "red", "gray"))
mapped = string_db$map(tmp, "external_gene_name",removeUnmappedRows = TRUE )
hits <- mapped$STRING_id
In [223]:
# post payload information to the STRING server
options(repr.plot.width=10, repr.plot.height=10)
payload_id <- string_db$post_payload(mapped$STRING_id, colors=mapped$color )
string_db$plot_network(hits, payload_id=payload_id )
In [179]:
clustersList <- string_db$get_clusters(mapped$STRING_id)
options(repr.plot.width=10, repr.plot.height=10)
par(mfrow=c(1,1))
for(i in seq(5:5)){
    tmp<-as.data.frame(clustersList[[i]])
    colnames(tmp)<-c("STRING_id")
    tmp<-merge(tmp,mapped)
    payload_id <- string_db$post_payload(tmp$STRING_id, colors=tmp$color )
    string_db$plot_network(tmp$STRING_id, payload_id=payload_id)
 }
In [178]:
library(pheatmap)
#corGeneList<-subset(corMatRetina, p.value.Combined<0.05 & estimate.Combined<0 & grepl("Slc",gene))$gene
corGeneList<-c("App", "Babam1", "Braf", "Cdkn1a", "Clk2", "Crb1", "Cry2", "Eif2ak4", "Fbxl17", "Grin1", 
               "Jun", "Mtor", "Net1", "Pde8b", "Ppp1cc", "Sdf4", "Tlk2")

#tmp1<-t(apply(subset(merged203, select=c(subset(manifest,Group=="IR")$Retina_rna), external_gene_name %in% corGeneList),1,scale))
#tmp2<-t(apply(subset(merged203, select=paste0("ratio_",c(subset(manifest,Group=="IR")$Retina_meth)), external_gene_name %in% corGeneList),1,scale))

tmp1<-subset(merged203, select=c(subset(manifest,Group=="IR")$Retina_rna), external_gene_name %in% corGeneList)
tmp2<-subset(merged203, select=paste0("ratio_",c(subset(manifest,Group=="IR")$Retina_meth)), external_gene_name %in% corGeneList)

tmpMerged<-as.data.frame(cbind(tmp1,tmp2))
colnames(tmpMerged)<-c(paste0(subset(manifest,Group=="IR")$Subject,"_",subset(manifest,Group=="IR")$Retina_rna),
                      paste0(subset(manifest,Group=="IR")$Subject,"_",subset(manifest,Group=="IR")$Retina_meth))

labels<-as.data.frame(colnames(tmpMerged))
colnames(labels)<-c("Sample")
labels$Subject<-c(subset(manifest,Group=="IR")$Subject,subset(manifest,Group=="IR")$Subject)
labels$Group<-ifelse(grepl("Mmus",labels$Sample),"RNA","RRBS")
labels <- labels %>% column_to_rownames(var="Sample")

tmpMerged<-tmpMerged[ , order(names(tmpMerged))]
rownames(tmpMerged)<-NULL
rownames(tmpMerged)<-paste0(subset(merged203, external_gene_name %in% corGeneList)$external_gene_name,"\n",
                           subset(merged203, external_gene_name %in% corGeneList)$ensembl_gene_id)

tmpMerged[, "sd"] <- apply(tmpMerged, 1, sd)
tmpMerged<-subset(tmpMerged,sd>0)
tmpMerged<-subset(tmpMerged,select= -c(sd))
dim(tmpMerged)
  1. 17
  2. 10
In [179]:
tmpMerged
A data.frame: 17 × 10
M75_CFG1831M75_Mmus_C57_6J_RTN_HLLC_IR_4mon_Rep1_M75M80_CFG1835M80_Mmus_C57_6J_RTN_HLLC_IR_4mon_Rep2_M80M82_CFG1836M82_Mmus_C57_6J_RTN_HLLC_IR_4mon_Rep3_M82M84_CFG1838M84_Mmus_C57_6J_RTN_HLLC_IR_4mon_Rep4_M84M89_CFG1842M89_Mmus_C57_6J_RTN_HLLC_IR_4mon_Rep5_M89
<dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
App ENSMUSG000000228920.8977272712.6320020.7135678413.0250150.8560000012.6227860.8412698412.7136410.8541666712.723010
Babam1 ENSMUSG000000318200.93162393 9.4019820.92783505 9.4509610.95428571 9.3246190.92786885 9.4570860.92376682 9.448115
Braf ENSMUSG000000024130.0555555611.3545300.0400000011.2252280.0526315811.3904900.0400000011.2104350.0250000011.194192
Cdkn1a ENSMUSG000000230670.84536082 4.8355570.78308824 5.5552000.93362832 4.0420310.79220779 4.8713960.82876712 4.958092
Clk2 ENSMUSG000000689170.90950226 9.4100640.96815287 9.2419800.91176471 9.4029610.87331536 9.5663750.97267760 9.211033
Crb1 ENSMUSG000000636810.9333333310.6472860.9565217410.4587520.9130434810.6495460.9074074110.6790260.9615384610.530957
Cry2 ENSMUSG000000687420.0000000010.5897470.0000000010.5297400.0000000010.5516250.0112359610.4497330.0000000010.574828
Eif2ak4 ENSMUSG000000051020.51898734 8.9007080.33875339 8.7481900.52411576 8.8221500.39871383 8.8007690.26726727 8.687011
Fbxl17 ENSMUSG000000239650.06250000 9.1670590.05844156 9.0704940.08227848 9.2893780.02431611 9.0181460.04964539 9.080537
Grin1 ENSMUSG000000269590.7596153810.7067250.6618911210.5396020.7194444410.6460680.6575091610.6023940.7357513010.713395
Jun ENSMUSG000000526840.05128205 9.1598880.04444444 9.1137910.00000000 8.9630030.01315789 8.9870310.04918033 9.121517
Mtor ENSMUSG000000289910.1250000011.1729140.2045454511.1861610.0779220811.0582650.1041666711.0805300.1896551711.192819
Net1 ENSMUSG000000212150.07142857 7.8630900.10344828 7.8425990.04838710 7.7463170.01428571 7.5247090.00000000 7.622799
Pde8b ENSMUSG000000216840.2051282110.7455650.1117478510.4008340.1550387610.6590690.1247484910.3875500.1333333310.423866
Ppp1cc ENSMUSG000000044550.0526315811.6701570.0434782611.7738120.0975609811.4995550.0134228211.8231140.0468750011.715357
Sdf4 ENSMUSG000000290760.0000000011.1483650.0909090911.0721430.1111111111.0103710.0256410311.0976250.0294117611.162984
Tlk2 ENSMUSG000000206940.2418300710.4950980.4181818210.3203910.2941176510.4203390.2013605410.4648690.2652519910.461974
In [181]:
options(repr.plot.width=6, repr.plot.height=5)
pheatmap(tmpMerged,fontsize = 14,annotation_col=labels,
        show_rownames=TRUE,show_colnames=FALSE,cluster_cols=FALSE,cluster_rows=FALSE,
        clustering_distance_rows = "correlation",clustering_distance_cols = "correlation",
        border_color = NA, main="+ve, Combined")
In [174]:
corGeneList<-subset(corMatMeth, p.value.IR<0.05 & estimate.IR<0)$gene

tmp1<-t(apply(subset(mergedMeth, select=paste0("ratio_",c(subset(manifest,Group=="IR")$Brain_meth)), external_gene_name %in% corGeneList),1,scale))
tmp2<-t(apply(subset(mergedMeth, select=paste0("ratio_",c(subset(manifest,Group=="IR")$Retina_meth)), external_gene_name %in% corGeneList),1,scale))

tmpMerged<-as.data.frame(cbind(tmp1,tmp2))
colnames(tmpMerged)<-c(paste0(subset(manifest,Group=="IR")$Subject,"_",subset(manifest,Group=="IR")$Brain_meth),
                      paste0(subset(manifest,Group=="IR")$Subject,"_",subset(manifest,Group=="IR")$Retina_meth))

labels<-as.data.frame(colnames(tmpMerged))
colnames(labels)<-c("Sample")
labels$Subject<-c(subset(manifest,Group=="IR")$Subject,subset(manifest,Group=="IR")$Subject)
labels$Group<-c(rep("BRN",length(subset(manifest,Group=="IR")$Brain_meth)), rep("RTN",length(subset(manifest,Group=="IR")$Retina_meth)))
labels <- labels %>% column_to_rownames(var="Sample")

tmpMerged<-tmpMerged[ , order(names(tmpMerged))]
rownames(tmpMerged)<-NULL
rownames(tmpMerged)<-paste0(subset(mergedMeth, external_gene_name %in% corGeneList)$external_gene_name,"_",
                           subset(mergedMeth, external_gene_name %in% corGeneList)$ensembl_gene_id)

tmpMerged[, "sd"] <- apply(tmpMerged, 1, sd)
tmpMerged<-subset(tmpMerged,sd>0)
tmpMerged<-subset(tmpMerged,select= -c(sd))
dim(tmpMerged)
  1. 213
  2. 10
In [ ]:
options(repr.plot.width=5, repr.plot.height=5)
pheatmap(tmpMerged,fontsize = 16,annotation_col=labels,
        show_rownames=FALSE,show_colnames=FALSE,cluster_cols=FALSE,
        clustering_distance_rows = "correlation",clustering_distance_cols = "correlation",
        border_color = NA, main="-ve, IR")
In [ ]:
simMatrix <- calculateSimMatrix(as.data.frame(oraGO_Combined)$ID,
                                orgdb="org.Mm.eg.db",
                                ont="BP",
                                method="Rel")
    scores <- setNames(-log10(as.data.frame(oraGO_Combined)$`p.adjust`), as.data.frame(oraGO_Combined)$ID)
    reducedTerms_Combined <- reduceSimMatrix(simMatrix,
                                scores,
                                orgdb="org.Mm.eg.db")

ggplot(reducedTerms_Combined, aes(area = score, subgroup = parentTerm, fill = parentTerm, label = term)) + 
  geom_treemap(size=0.5, col='white') + ggtitle("Combined") +
  geom_treemap_text(col='gray',place="center",reflow=TRUE) +
  geom_treemap_subgroup_border(col='white',size=1) +
  geom_treemap_subgroup_text(col='white',place="center",fontface="bold",size=12,reflow=TRUE,grow=TRUE,min.size=1) + theme(plot.title = element_text(hjust=0.5,size=10)) +
  guides(fill=F)
preparing gene to GO mapping data...

preparing IC data...

Warning message in calculateSimMatrix(as.data.frame(oraGO_Combined)$ID, orgdb = "org.Mm.eg.db", :
“Removed 1 terms that were not found in orgdb for BP”
Warning message:
“`guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.”